Content:

The data used in this project is of animal sightings on the trail cameras around the Duke forest and the spatial data of the Duke forest;it was collected as part of an ongoing project monitoring the white-tail deer population.

Hypotheses:

  1. Hypothesis 1:
    • H0: The time of day does not have an impact on observed deer
    • Ha: The time of day being dawn/dusk results in an increase of observed deer
  2. Hypothesis 2:
    • H0: Development has no impact on observed deer
    • Ha: An increase in development results in a decrease of observed deer
  3. Hypothesis 3:
    • H0: The phase of the moon has no impact on observed deer
    • Ha: The moon being full/gibbous results in an increase of observed deer
  4. Hypothesis 4:
    • H0: The phase of the moon has no effect on the time of day deer are observed
    • Ha: The moon being full/gibbous results in an increase of observed deer at dawn/dusk

Background:

To conduct a deer survey with trail cameras, one camera must be placed every 160 or so acres. (Source: Thomas Jr., L. (2012, April 19). How to run a trail-camera survey. Quality Deer Management Association.) The Duke forest is 7,000 acres.(Source: Duke University. (n.d.). Duke Forest – Teaching and Research Laboratory. Retrieved from https://dukeforest.duke.edu) There are 30 trail cameras in the Duke forest along known migration paths.

library(here)
here() #get working directory
## [1] "/Users/sophiemoyer/Documents/EDE-Final"
knitr::include_graphics(here("Images", "buck_day.jpg"))

Setup & Data Wrangling:

Importing Packages & Files

#import packages
library(tidyverse)
library(lubridate)
library(viridis)
library(rvest);
library(dataRetrieval)
library(dplyr)
library(readr)
library(stringr)
library(sf); 
library(mapview); mapviewOptions(fgb = FALSE)
library(RColorBrewer)
#install.packages("AICcmodavg")
library(AICcmodavg)
##install.packages("multcompView")
library(multcompView)

custom.theme <- function() {
  theme_minimal() + 
  theme(
    panel.background = element_rect(fill = "seashell"),
    panel.grid.major = element_line(colour = "bisque2", linetype = "dashed"),
    axis.line = element_line(colour = "black", size = 0.5),
    axis.text = element_text(size = 10, color = "salmon3", angle = 10),
    axis.title = element_text(size = 12, face = "bold", color = "salmon4"),
    plot.title = element_text(size = 16, color = "black", face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, color = "gray", hjust = 0.5),
    legend.text = element_text(size = 10, color = "salmon3"),
    legend.title = element_text(size = 12, color = "salmon4", face = "bold"),
    legend.position = "right"
  )
}
#read excel file 
trailcam_csv <- read.csv(here("Data", "Raw", "sequences.csv"))
moon_phases_csv <- read.csv(here("Data", "Raw", "moon_phases.csv"))

Creating Date & Time Objects

#date as objects
trailcam_csv$start_time <- ymd_hms(trailcam_csv$start_time)
trailcam_csv$end_time <- ymd_hms(trailcam_csv$end_time)
moon_phases_csv$start_date <- as.Date(moon_phases_csv$start_date)
#separate date and time
trailcam_csv$start_date <- as.Date(trailcam_csv$start_time)
trailcam_csv$start_time <- format(trailcam_csv$start_time, "%H")
trailcam_csv$month <- month(as.Date(trailcam_csv$start_date))
trailcam_csv$month_name <- month.name[trailcam_csv$month]

trailcam_csv$end_date <- as.Date(trailcam_csv$end_time)  
trailcam_csv$end_time <- format(trailcam_csv$end_time, "%H") 

Filtering Joining & Categorizing

#filter for white- tailed deer, select relevant columns, and isolate trail camera id number.
deer_data <- trailcam_csv %>%
  filter(common_name == "White-tailed Deer") %>%
  select(common_name, deployment_id, start_date, start_time, group_size, month, month_name) %>%
  mutate(cam_id = as.numeric(str_extract(deployment_id, "\\d+")),
                             division = str_extract(deployment_id, "\\((.*?)\\)"))
#cam_id refers to the camera id number
#mutate(cam_id = as.numeric(str_extract(deployment_id, "\\d+"))) taken from stackoverflow

#read file with trail camera coordinates (delivered as a .xlsx and converted to a .csv)
cam_coords <- read.csv(here("Data", "Raw", "camera_coords.csv"))

#Select id number and coordinates
cam_coordinates <- cam_coords %>%
  mutate(cam_id = as.numeric(str_extract(deployment_id, "\\d+")))
#join deer data with trail camera coordinate data to find where cameras are, and clean up data
deer_cam_data <- left_join(deer_data, cam_coordinates, by = "cam_id") %>%
  select(common_name, start_date, start_time, group_size, cam_id, division, longitude, latitude, month, month_name)
deer_cam_data <- left_join(deer_cam_data, moon_phases_csv, by = "start_date")

#categorize hours into groups
deer_cam_data$start_time <- as.numeric(deer_cam_data$start_time)
categorize_time <- function(hour) {
  ifelse(hour >= 6 & hour < 12, "Morning",
         ifelse(hour >= 12 & hour < 20, "Afternoon", "Evening"))
}
deer_cam_data$time_category <- cut(deer_cam_data$start_time, 
                                   breaks = c(-Inf, 5.99, 11.99, 19.99, Inf),
                                   labels = c("Evening", "Morning", "Afternoon", "Evening"),
                                   right = FALSE)

categorize_moon_phase <- function(phase) {
  phase_lower <- tolower(phase)
  if (grepl("full", phase_lower) || grepl("gibbous", phase_lower)) {
    return("FullAndGibbous")
  } else if (grepl("new", phase_lower) || grepl("crescent", phase_lower)) {
    return("NewAndCrescent")
  } else {
    return("QuarterMoon")
  }
}

deer_cam_data$moon_type <- sapply(deer_cam_data$moon_phase, categorize_moon_phase)

#read Duke forest boundary shapefile into project
#here("Duke University/Documents/EDE_Fall2023/DukeForest-TackRiosMoyer/duke-forest-spatial-data/duke-forest-spatial-data/Boundary")
#forest_sf <- st_read("Duke_Forest_Boundary_Mar2022.shp")
#mapview(forest_sf)

#adding a new column - categorical time of day 
deer_data <- deer_data %>% mutate(
    TOD = case_when(
      start_time %in% c(20, 21, 22, 23, "00", "01", "02", "03") ~ 'night ',
      start_time %in% c("04", "05", "06", "07", "08", "09", 10, 11) ~ 'morning',
      start_time %in% c(12, 13, 14, 15, 16, 17, 18, 19) ~ 'day',
      TRUE ~ NA_character_
    )
  )
#categorical variable for moon phase
deer_data <- inner_join(deer_data, moon_phases_csv, by = "start_date")

Coverting Spatial Data to Dataframe

#convert coordinates to a spatial dataframe
#deer_cam_data_sf <- deer_cam_data_sf %>% st_as_sf(coords = c("Longtitude","Latitude"), crs=4326)

Statistical Tests & Machine Learning:

Before performing any statistical tests, we should first look at the variables we hope to analyze to determine any initial observations about the relationship.

Understanding the Data

deer_data <- deer_data %>%
  arrange(month, start_time)

# Calculate sum of sightings by month and hour
deer_hours <- deer_data %>%
  group_by(month, start_time, month_name) %>%
  mutate(sightings = 1, .groups = 'drop') %>% 
  summarise(sightings = sum(sightings), .groups = 'drop')

# Create a line plot using ggplot2
sightings <- ggplot(deer_hours, aes(x = start_time, y = sightings, group = month, color = as.factor(month_name))) +
  geom_line() +
  labs(title = "Deer Sightings", x = "Hour",  y = "Sightings") +
  scale_color_discrete(name = "Month") + custom.theme() + facet_wrap(~ month_name, ncol = 1)

sightings

As the plot suggests, there are some peaks in deer sightings that can be observed over the course of a day. Most notably, in April there is a large spike of deer sightings in the early morning and evening. This plot connects with our first hypothesis: “The time of day has no effect on deer observations”. In the next section, we will select a statistical test to run on these variables to determine if there is any relationship present.


ANOVA Tests

#testing hypothesis one - time of day has no effect on deer observations
time.one.way <- aov(group_size ~ TOD, data = deer_data)
one.way.result <- summary(time.one.way)
timeone <- as.data.frame(one.way.result[[1]])
##knitr::kable(timeone, format = "html", digits = 3)
gt::gt(data = timeone) %>%  gt::tab_header(title = gt::md("**One-Way ANOVA Results**"), subtitle = gt::md("Determining relationship between *deer sightings and time of day*"))
One-Way ANOVA Results
Determining relationship between deer sightings and time of day
Df Sum Sq Mean Sq F value Pr(>F)
2 3.726723 1.8633613 3.929286 0.02003825
800 379.379130 0.4742239 NA NA
#testing hypothesis three - moon phase has no impact on deer observations
moon.one.way <- aov(group_size ~ moon_phase, data = deer_data)
moon.one <- summary(moon.one.way)
moontable <- as.data.frame(moon.one[[1]])
##knitr::kable(moontable, format = "html", digits = 3)
gt::gt(data = moontable) %>%  gt::tab_header(title = gt::md("**One-Way ANOVA Results**"), subtitle = gt::md("Determining relationship between *deer sightings and moonphase*"))
One-Way ANOVA Results
Determining relationship between deer sightings and moonphase
Df Sum Sq Mean Sq F value Pr(>F)
7 3.637784 0.5196834 1.088756 0.3683484
795 379.468069 0.4773183 NA NA
#moon and time test
moon.and.time <- aov(group_size ~ TOD + moon_phase, data = deer_data)
moontime <- summary(moon.and.time)
two.way.table <- as.data.frame(moontime[[1]])
##knitr::kable(two.way.table, format = "html", digits = 3)
gt::gt(data = two.way.table) %>%  gt::tab_header(title = gt::md("**Two-Way ANOVA Results**"), subtitle = gt::md("Determining relationship between *deer sightings and time of day/moonphase*"))
Two-Way ANOVA Results
Determining relationship between deer sightings and time of day/moonphase
Df Sum Sq Mean Sq F value Pr(>F)
2 3.726723 1.8633613 3.935447 0.01991973
7 3.908303 0.5583290 1.179199 0.31211213
793 375.470827 0.4734815 NA NA
#interaction test
moon.time <- aov(group_size ~ TOD*moon_phase, data = deer_data)
interaction <- summary(moon.time)
interactiontable <- as.data.frame(interaction[[1]])
##knitr::kable(interactiontable, format = "html", digits = 3)
gt::gt(data = interactiontable) %>%  gt::tab_header(title = gt::md("**Two-Way ANOVA with *Interactions* Results**"), subtitle = gt::md("Determining relationship between *deer sightings and time of day/moon phase*"))
Two-Way ANOVA with Interactions Results
Determining relationship between deer sightings and time of day/moon phase
Df Sum Sq Mean Sq F value Pr(>F)
2 3.726723 1.8633613 3.8893718 0.02085667
7 3.908303 0.5583290 1.1653935 0.32029660
14 2.259285 0.1613775 0.3368414 0.98913864
779 373.211542 0.4790906 NA NA
#maybe the division locations are adding a block?
moon.time.block <- aov(group_size ~ TOD*moon_phase + division, data = deer_data)
blocking <- summary(moon.time.block)
blockingtable <- as.data.frame(blocking[[1]])
##knitr::kable(blockingtable, format = "html", digits = 3)
gt::gt(data = blockingtable) %>%  gt::tab_header(title = gt::md("**Two-Way ANOVA with *Blocking* Results**"), subtitle = gt::md("Determining relationship between *deer sightings and time of day/moon phases* with division blocking"))
Two-Way ANOVA with Blocking Results
Determining relationship between deer sightings and time of day/moon phases with division blocking
Df Sum Sq Mean Sq F value Pr(>F)
2 3.7267226 1.86336128 3.8815298 0.02102028
7 3.9083030 0.55832900 1.1630437 0.32170415
2 0.1820533 0.09102663 0.1896157 0.82731525
14 2.2833399 0.16309571 0.3397413 0.98866368
777 373.0054343 0.48005847 NA NA

The one-way ANOVA (Analysis of Variance) results suggest that there is a statistically significant difference among the means of the groups defined by the variable TOD (Time of Day). Here’s how to interpret the output:

  1. Df (Degrees of Freedom): This indicates the degrees of freedom associated with the groups and residuals.
    • For TOD: 2 degrees of freedom.
    • For Residuals: 800 degrees of freedom.
  2. Sum Sq (Sum of Squares): This is a measure of the total variability in the data.
    • For TOD: 3.7
    • For Residuals: 379.4
  3. Mean Sq (Mean Sum of Squares): This is obtained by dividing the Sum of Squares by its corresponding degrees of freedom.
    • For TOD: 1.8634
    • For Residuals: 0.4742
  4. F value (F-statistic): This is the ratio of the variance among group means to the variance within the groups. It is calculated as the Mean Square for TOD divided by the Mean Square for Residuals.
    • In this case, the F value is 3.929.
  5. Pr(>F) (p-value): This is the probability of obtaining an F-statistic as extreme as, or more extreme than, the one observed in the data, assuming the null hypothesis is true.
    • In this case, the p-value is 0.02, which is less than the commonly used significance level of 0.05.

Interpretation: The p-value (Pr(>F)) is less than 0.05, indicating that there is evidence to reject the null hypothesis. Therefore, there is a statistically significant difference in means among the groups defined by TOD. However, the specific interpretation of which groups are different would require further post-hoc tests or examination of the group means.

It’s also worth noting that the effect size and the context of our study should be considered when interpreting the practical significance of the results.

Determining the Best ANOVA Model

model.set <- list(time.one.way, moon.and.time, moon.time, moon.time.block)
model.names <- c("one.way", "two.way", "interaction", "blocking")
aictab(model.set, modnames = model.names)
## 
## Model selection based on AICc:
## 
##              K    AICc Delta_AICc AICcWt Cum.Wt      LL
## one.way      4 1684.76       0.00   0.95   0.95 -838.36
## two.way     11 1690.73       5.97   0.05   1.00 -834.20
## interaction 25 1715.22      30.46   0.00   1.00 -831.77
## blocking    27 1719.06      34.30   0.00   1.00 -831.55
#based on the AIC, the one-way ANVOA test is the best model. The one.way test has 95% of the AIC weight - meaning it explains 95% of the total variation in the dependent variable (group_size)
plot(time.one.way)

tukey.one.way <- TukeyHSD(time.one.way)
tukey.one.way
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = group_size ~ TOD, data = deer_data)
## 
## $TOD
##                       diff        lwr         upr     p adj
## morning-day     0.09096293 -0.0441878  0.22611366 0.2547153
## night -day     -0.07540230 -0.2225017  0.07169713 0.4513530
## night -morning -0.16636523 -0.3073211 -0.02540937 0.0157434
plot(tukey.one.way, las = 1)

The results of the Tukey Honest Significant Difference (HSD) test provide insights into the pairwise differences between the levels of the TOD variable (Time of Day) with respect to the group_size variable. Let’s break down the output:

  1. diff: This column represents the differences in means between the levels of TOD.

  2. lwr and upr: These columns indicate the lower and upper bounds of the 95% confidence interval for the differences. If this interval includes zero, the difference is not considered statistically significant.

  3. p adj (adjusted p-value): This column shows the adjusted p-values after correcting for multiple comparisons.

Now, let’s interpret the results for each pairwise comparison:

  • morning vs. day:
    • Difference: 0.09096293
    • 95% Confidence Interval: [-0.0441878, 0.22611366]
    • p-value: 0.2547153
    • Interpretation: The difference is not statistically significant as the confidence interval includes zero, and the p-value is greater than 0.05.
  • night vs. day:
    • Difference: -0.07540230
    • 95% Confidence Interval: [-0.2225017, 0.07169713]
    • p-value: 0.4513530
    • Interpretation: The difference is not statistically significant as the confidence interval includes zero, and the p-value is greater than 0.05.
  • night vs. morning:
    • Difference: -0.16636523
    • 95% Confidence Interval: [-0.3073211, -0.02540937]
    • p-value: 0.0157434
    • Interpretation: The difference is statistically significant as the confidence interval does not include zero, and the p-value is less than 0.05. Therefore, there is evidence to suggest a significant difference in group_size between night and morning.

Interpretation: The Tukey HSD test suggests that the group_size differs significantly between night and morning, while there is no significant difference between morning and day or night and day. Keep in mind that the interpretation of p-values depends on the chosen significance level (commonly 0.05), and adjustments may be made for multiple comparisons.


Spatial Analysis & Maps:

SUBHEADING 1 HERE

#Bringing in Location data
#duke_forest<- st_read("/home/guest/module1/DukeForest-TackRiosMoyer/Data/Raw/Duke_Forest_Boundary_Mar2022.shp")

#converting camera points to locations
cameras.sf <- st_as_sf(cam_coordinates, coords = c("longitude","latitude"),
           crs=4326)

#wrangling to include number of deer sightings at each location

SUBHEADING 2 HERE

#Plotting Camera sites and number of dear sighings at each one 
mapview(cameras.sf,cex = 4, map.types="OpenStreetMap.Mapnik")
#mapview(cameras.sf,cex = "sightings",map.types="OpenStreetMap.Mapnik")

#plotting dear sighings at each camera trap for different times of the day 


Data Visualization & Plots:

Custom Theme for Plots

#custom theme to use for plots
custom.theme <- function() {
  theme_minimal() + 
  theme(
    panel.background = element_rect(fill = "seashell"),
    panel.grid.major = element_line(colour = "bisque2", linetype = "dashed"),
    axis.line = element_line(colour = "black", size = 0.5),
    axis.text = element_text(size = 10, color = "salmon3", angle = 10),
    axis.title = element_text(size = 12, face = "bold", color = "salmon4"),
    plot.title = element_text(size = 16, color = "black", face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, color = "gray", hjust = 0.5),
    legend.text = element_text(size = 10, color = "salmon3"),
    legend.title = element_text(size = 12, color = "salmon4", face = "bold"),
    legend.position = "right"
  )
}

Herd Size Scatter Plots

#scatter plot of herd size and time of day
scatter.deer.time.herd <- 
  ggplot(deer_cam_data, aes(x = start_time, y = group_size)) +
  geom_point() +
  geom_smooth(method = loess, color="black") +
  labs(
    title = "Herd Size Observed Based on Time of Day",
    x = "Time of Day (hour)",
    y = "Herd Size"
  ) +
  custom.theme()

print(scatter.deer.time.herd)

#insert image of phases of the moon for reference purposes
knitr::include_graphics(here("Images", "moon_phases.png"))

#scatter plot of herd size and moon phase
scatter.deer.moon.herd <- 
  ggplot(deer_cam_data, aes(x = moon_phase, y = group_size)) +
  geom_point() +
  labs(
    title = "Herd Size Observed Based on Moon Phase",
    x = "Moon Phase",
    y = "Group Size"
  ) +
  scale_x_discrete(labels = c("new moon", "waxing crescent", "first quarter", "waxing gibbous", "full moon", "waning gibbous", "third quarter", "waning crescent")) +
  scale_y_continuous(breaks = seq(0, ceiling(max(deer_cam_data$group_size)), by = 1)) +
  custom.theme()

#scale_x_discrete to get labels in order that I want them to be

print(scatter.deer.moon.herd)


Time of Day Scatter Plots

#scatter plot of moon phase and time of day
scatter.deer.moon.time <- 
  ggplot(deer_cam_data, aes(x = moon_phase, y = start_time)) +
  geom_point() +
  geom_smooth(method = lm, color="black") +
  labs(
    title = "Deer Observed Based on Moon Phase & Time of Day",
    x = "Moon Phase",
    y = "Time of Day (hour)"
  ) +
  custom.theme()

print(scatter.deer.moon.time)

#scatter plot of month and time of day, sorted by herd size
scatter.deer.time.month <- 
  ggplot(deer_cam_data, aes(x = start_date, y = start_time, color = group_size)) +
  geom_point() +
  geom_smooth(method = lm, color="black") +
  labs(
    title = "Deer Observed Based on Month & Time of Day",
    subtitle = "Categorized by Herd Size",
    x = "Month",
    y = "Time of Day (hour)",
    color = "Herd Size"
  ) +
  custom.theme()

print(scatter.deer.time.month)

scatter.deer.time.day.moon <- 
  ggplot(deer_cam_data, aes(x = start_date, y = moon_type, color = time_category)) +
  geom_point() +
  labs(
    title = "Deer Observed Based on Month & Moon Phase",
    subtitle = "Categorized by Time of Day",
    x = "Month by Date",
    y = "Moon Phase (type)",
    color = "Time of Day (type)"
  ) +
  custom.theme()

print(scatter.deer.time.day.moon)


Heatmaps

deer.moon.time.heatmap <- 
  ggplot(deer_cam_data, aes(x = moon_phase, y = start_time, fill = group_size)) +
  geom_tile() +
  scale_fill_gradient(low = "yellow", high = "red") +
  labs(
    title = "Deer Observed Based on Moon Phase & Time of Day",
    subtitle = "Characterized by Herd Size",
    x = "Moon Phase",
    y = "Time of Day (hour)",
    fill = "Herd Size"
  ) +
  custom.theme()

print(deer.moon.time.heatmap)

deer.cam.time.heatmap <- 
  ggplot(deer_cam_data, aes(x = start_time, y = cam_id, fill = group_size)) +
  geom_tile() +
  scale_fill_gradient(low = "yellow", high = "red") +
  labs(
    title = "Deer Observed Based on Time of Day & Camera",
    subtitle = "Characterized by Herd Size",
    x = "Time of Day (hour)",
    y = "Camera ID",
    fill = "Herd Size"
  ) +
  scale_x_continuous(breaks = seq(min(deer_cam_data$start_time), max(deer_cam_data$start_time), by = 2)) +
  scale_y_continuous(breaks = seq(min(deer_cam_data$cam_id), max(deer_cam_data$cam_id), by = 5)) +
  custom.theme()

print(deer.cam.time.heatmap)